rm(list = ls())

library(data.table)
library(estimatr)

add_backticks = function(x) {
  paste0("`", x, "`")
}

x_lm_formula = function(x) {
  paste(add_backticks(x), collapse = " + ")
}


load('./data/panel_month_dummies.RData')

panel[,covid_death:=covid2*excess_deaths_jan_d]
panel[is.na(extr_right_mayor), extr_right_mayor:=0]

month_cols <-  grep('month_2', colnames(panel), value=TRUE)

panel[,less_college:=(100-educ_diplBA)]
panel[,(paste('educ',month_cols,sep = '_')):= lapply(.SD, function(x) 
  x * panel[['less_college']] ), .SDcols = month_cols]

panel[,(paste('foreign',month_cols,sep = '_')):= lapply(.SD, function(x) 
  x * panel[['foreign_pop']] ), .SDcols = month_cols]

panel[,(paste('old',month_cols,sep = '_')):= lapply(.SD, function(x) 
  x * panel[['above75_share']] ), .SDcols = month_cols]

panel[,(paste('localgov',month_cols,sep = '_')):= lapply(.SD, function(x) 
  x * panel[['extr_right_mayor']] ), .SDcols = month_cols]

educ_cols <-  grep('educ_month', colnames(panel), value=TRUE)
educ_cols <- educ_cols[-157]
foreign_cols <- grep('foreign_month', colnames(panel), value=TRUE)
foreign_cols <- foreign_cols[-157]
old_cols <- grep('old_month', colnames(panel), value=TRUE)
old_cols <- old_cols[-157]
gov_cols <- grep('localgov_month', colnames(panel), value=TRUE)
gov_cols <- gov_cols[-157]

flex_controls <- c(educ_cols, foreign_cols, old_cols, gov_cols)

panel[,time:= as.numeric(round((month-as.Date('2020-01-01',format='%Y-%m-%d'))/(365.25/12)))]

province_cols <- grep('code_province_', colnames(panel), value=TRUE)
panel[,(paste('trend',province_cols,sep = '_')):= lapply(.SD, function(x) 
  x * panel[['time']] ), .SDcols = province_cols]
trend_cols <- grep('trend_code_province', colnames(panel), value=TRUE)
trend_cols <- trend_cols[-1]

out_death_jan <- lm_robust(hc_pc_asians ~ covid_death, data=panel, se_type='stata',
                           clusters = panelvar, fixed_effects = ~ panelvar + month)
save(out_death_jan, file = './output/out_death_jan.RData')
rm(out_death_jan)

formula_flex_controls <- as.formula(paste("hc_pc_asians ~ covid_death +", x_lm_formula(flex_controls)))

out_death_jan_con <- lm_robust(formula = formula_flex_controls,
                               data=panel, se_type='stata',
                               clusters = panelvar, fixed_effects = ~ panelvar + month)

save(out_death_jan_con, file = './output/out_death_jan_con.RData')

rm(out_death_jan_con)

panel[,trend_death:=time*excess_deaths_jan_d]

formula_flex_controls_trend <- as.formula(paste("hc_pc_asians ~ covid_death + trend_death +", x_lm_formula(flex_controls)))

out_death_jan_con_trend <- lm_robust(formula = formula_flex_controls_trend,
                                     data=panel, se_type='stata',
                                     clusters = panelvar, fixed_effects = ~ panelvar + month)

save(out_death_jan_con_trend, file = './output/out_death_jan_con_trend.RData')

rm(out_death_jan_con_trend)

flex_controls <- c(educ_cols, foreign_cols, old_cols, gov_cols, trend_cols)

formula_flex_controls <- as.formula(paste("hc_pc_asians ~ covid_death + trend_death +", x_lm_formula(flex_controls)))

out_death_jan_con_trend_all <- lm_robust(formula = formula_flex_controls,
                                         data=panel, se_type='stata',
                                         clusters = panelvar, fixed_effects = ~ panelvar + month)

save(out_death_jan_con_trend_all, file = './output/out_death_jan_con_trend_all.RData')

## to produce output for row 'Average Hate Crimes' in Panel A 

panel_death <- panel[!is.na(hc_pc_asians) & !is.na(excess_deaths_jan_d)]
rm(panel)
panel_death[,treat:=covid2*excess_deaths_jan_d]

mod <- lm_robust(treat ~ excess_deaths_jan_d + covid2, data=panel_death)
weight <- ((panel_death$treat - mod$fitted.values))^2
panel_death[,effweight:=weight]

effective_sample_death <- panel_death[excess_deaths_jan_d==0,sum(effweight*hc_pc_asians)/sum(effweight),by=covid2]

mod <- lm_robust(treat ~ excess_deaths_jan_d + covid2, se_type='stata',
                 data=panel_death, fixed_effects = ~ panelvar + month)
weight <- ((panel_death$treat - mod$fitted.values))^2
panel_death[,effweight:=weight]

effective_sample_death_fe <- panel_death[excess_deaths_jan_d==0,sum(effweight*hc_pc_asians)/sum(effweight),by=covid2]
nominal_sample_death <- panel_death[excess_deaths_jan_d==0,mean(hc_pc_asians),by=covid2]

rm(mod)

flex_controls <- c(educ_cols, foreign_cols, old_cols, gov_cols)

formula_flex_controls <- as.formula(paste("treat ~ excess_deaths_jan_d + covid2 +", x_lm_formula(flex_controls)))

mod <- lm_robust(formula = formula_flex_controls,
                 data=panel_death, se_type='stata',
                 fixed_effects = ~ panelvar + month)
weight <- ((panel_death$treat - mod$fitted.values))^2
panel_death[,effweight:=weight]

effective_sample_death_fe_con <- panel_death[excess_deaths_jan_d==0,sum(effweight*hc_pc_asians)/sum(effweight),by=covid2]

rm(mod)

panel_death[,time:= as.numeric(round((month-as.Date('2020-01-01',format='%Y-%m-%d'))/(365.25/12)))]
panel_death[,trend_death:=time*excess_deaths_jan_d]

formula_flex_controls_trend <- as.formula(paste("treat ~ excess_deaths_jan_d + covid2 + trend_death +", x_lm_formula(flex_controls)))

mod <- lm_robust(formula = formula_flex_controls_trend,
                 data=panel_death, se_type='stata',
                 fixed_effects = ~ panelvar + month)

weight <- ((panel_death$treat - mod$fitted.values))^2
panel_death[,effweight:=weight]

effective_sample_death_fe_con_trend <- panel_death[excess_deaths_jan_d==0,sum(effweight*hc_pc_asians)/sum(effweight),by=covid2]

rm(mod)

province_cols <- grep('code_province_', colnames(panel_death), value=TRUE)
panel_death[,(paste('trend',province_cols,sep = '_')):= lapply(.SD, function(x) 
  x * panel_death[['time']] ), .SDcols = province_cols]
trend_cols <- grep('trend_code_province', colnames(panel_death), value=TRUE)
trend_cols <- trend_cols[-1]

flex_controls <- c(educ_cols, foreign_cols, old_cols, gov_cols, trend_cols)

formula_flex_controls <- as.formula(paste("treat ~ excess_deaths_jan_d + covid2 + trend_death +", x_lm_formula(flex_controls)))

mod <- lm_robust(formula = formula_flex_controls,
                 data=panel_death, se_type='stata',
                 fixed_effects = ~ panelvar + month)

weight <- ((panel_death$treat - mod$fitted.values))^2
panel_death[,effweight:=weight]

effective_sample_death_fe_con_trend_prov <- panel_death[excess_deaths_jan_d==0,sum(effweight*hc_pc_asians)/sum(effweight),by=covid2]

rm(mod)

save(nominal_sample_death, effective_sample_death, effective_sample_death_fe, effective_sample_death_fe_con,
     effective_sample_death_fe_con_trend, effective_sample_death_fe_con_trend_prov, file='./output/effective_sample_data_death_jan.RData')
